## Daniel J. Mallinson
## Reproduction Script for "Suicide Trends and Prevention in Rural Pennsylvania Counties and Schools"
## 2020-12-01
## 2021-06-02

rm(list = ls())

install.packages("zoo", "reshape", "panelAR", "car")

library(foreign)
library(zoo)
library(reshape)
library(panelAR)
library(car)

data <- read.csv("Final Dataset_20210107.csv")
time <- read.csv("over_time_data.csv") #Use for US and PA data
overtime <- read.csv("overtimedata.csv") #Use for Rural vs. Urban data

countynames <- unique(data$county)

## Interpolation for two variables with missing values

for(i in 1:67){
	a <- data[which(data$county==countynames[i]),]
	a$college_pct <- na.approx(a$college_pct, na.rm=FALSE, rule=2)
	a$married_pct <- na.approx(a$married_pct, na.rm=FALSE, rule=2)
	if(i == 1){
	out <- a
	}else{
	out <- rbind(out,a)
	}
}

data <- out

## Plot Suicide Rates Over Time (Figure 1)

rates <- time[c("CountyState", "Year", "suicides_per1000", "rural")]

melt <- melt(rates, id=c("CountyState", "Year", "rural"))
cast <- cast(melt, variable~rural+Year, mean)

png("figure1.png", height=4, width=7, units="in", res=600)
plot.new()
par(mar=c(4,4,1,1))
plot.window(xlim=c(1990, 2020), ylim=c(0,25))
lines(ts(overtime$urban*100, start=1990, frequency=1), col="blue", lty=2, lwd=2)
lines(ts(overtime$rural*100, start=1990, frequency=1), col="red", lty=3, lwd=2)
#lines(ts(as.numeric(cast[1,60:88]*100), start=1990, frequency=1), col="black", lty=1, lwd=2)
axis(1, at=seq(1990,2020,5), labels=seq(1990,2020,5), font=2)
axis(2, at=seq(0,25,5), labels=seq(0,25,5), font=2, las=2)
title(main="", ylab="Suicides per 100,000 residents")
legend(1990, 25, legend=c( "Rural Counties","Urban Counties"), col=c("red", "blue"), lty=3:2, cex=0.8, bty="n")
dev.off()

png("figure2.png", height=4, width=7, units="in", res=600)
plot.new()
par(mar=c(4,4,1,1))
plot.window(xlim=c(1990, 2020), ylim=c(0,25))
lines(ts(as.numeric(cast[1,89:117]*100), start=1990, frequency=1), col="black", lty=1, lwd=2)
lines(ts(as.numeric(cast[1,60:88]*100), start=1990, frequency=1), col="darkgreen", lty=2, lwd=2)
axis(1, at=seq(1990,2020,5), labels=seq(1990,2020,5), font=2)
axis(2, at=seq(0,25,5), labels=seq(0,25,5), font=2, las=2)
title(main="", ylab="Suicides per 100,000 residents")
legend(1990, 25, legend=c( "United States","Pennsylvania"), col=c("black", "darkgreen"), lty=1:2, cex=0.8, bty="n")
dev.off()

## Regression Model of County Suicide Rates

model <- panelAR(suicides_per1000*100 ~ rural, data=data, panelVar="county", timeVar="year", 
	panelCorrMethod="pcse", singular.ok=TRUE, autoCorr="ar1", 
	complete.case=TRUE)
summary(model)

model2 <- panelAR(suicides_per1000*100 ~ rural + handguns_per1000 + male_pct + white_pct + 
	unemployment_pct + college_pct + married_pct + log(medhouseinc_AFI) + 
	age65plus, data=data, panelVar="county", timeVar="year", 
	panelCorrMethod="pcse", singular.ok=TRUE, autoCorr="ar1", 
	complete.case=TRUE)
summary(model2)

model3 <- panelAR(suicides_per1000*100 ~ rural + handguns_per1000 + 
	unemployment_pct + college_pct + log(medhouseinc_AFI) + 
	age65plus, data=data, panelVar="county", timeVar="year", 
	panelCorrMethod="pcse", singular.ok=TRUE, autoCorr="ar1", 
	complete.case=TRUE)
summary(model3)

## Check multicollinearity

checkmc <- lm(suicides_per1000*100 ~ rural + handguns_per1000 + male_pct + white_pct + 
	unemployment_pct + college_pct + married_pct + log(medhouseinc_AFI) + 
	age65plus, data=data)
vif(checkmc)

## Remove income for final model

model4 <- panelAR(suicides_per1000*100 ~ rural + handguns_per1000 + 
	unemployment_pct + college_pct + 
	age65plus, data=data, panelVar="county", timeVar="year", 
	panelCorrMethod="pcse", singular.ok=TRUE, autoCorr="ar1", 
	complete.case=TRUE)
summary(model4)

## Broadband model
broadband <- read.csv("broadband_combined_20210113.csv")

model5 <- lm(suicides_per1000*100 ~ nobroadband_pct, data=broadband)
summary(model5)

model6 <- lm(suicides_per1000*100 ~ nobroadband_pct + handguns_per1000 + 
		age65plus, data=broadband)
summary(model6)
